{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import tensorflow as tf"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Compare speed python list vs numpy ops\n",
    "\n",
    "Let's implement standard deviation function. And compare computation time for 10 million numbers."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "n = 10 ** 7"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Implementation using python list\n",
    "def std(x:list):\n",
    "    x_mean = sum(x)/len(x)\n",
    "    y = sum([(v - x_mean) ** 2 for v in x])/len(x)\n",
    "    return y**0.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1.32 s, sys: 131 ms, total: 1.45 s\n",
      "Wall time: 1.45 s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "2886751.3459482347"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%time std(range(n))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Implementation using numpy array function\n",
    "def std_np(x):\n",
    "    x_mean = np.sum(x)/len(x)\n",
    "    return (((x - x_mean) ** 2).mean())** 0.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 162 ms, sys: 48.3 ms, total: 211 ms\n",
      "Wall time: 106 ms\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "2886751.3459480824"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%time std_np(np.arange(n))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "As we can see numpy function much fater than that implemtated on python list. There are a built-in function in numpy to compute the standard deviation. Verify the std computed in all three techniques give same result."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 333 ms, sys: 16.2 ms, total: 349 ms\n",
      "Wall time: 59 ms\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "2886751.3459480824"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%time np.std(np.arange(int(1e7)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2886751.345948096]\n",
      "CPU times: user 583 ms, sys: 19.1 ms, total: 602 ms\n",
      "Wall time: 85.7 ms\n"
     ]
    }
   ],
   "source": [
    "%%time \n",
    "n_input = tf.placeholder(dtype=tf.float64)\n",
    "x = tf.range(0, n_input)\n",
    "x_mean = tf.reduce_mean(x)\n",
    "x_std = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(x, x_mean))))\n",
    "with tf.Session() as sess:\n",
    "    print(sess.run([x_std], feed_dict={n_input: n}))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Matrix multiplication\n",
    "\n",
    "#### Usecase - solve Normal Equations\n",
    "\n",
    "http://mlwiki.org/index.php/Normal_Equation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "W:  [ 2.3 -5.7  8.9]\n"
     ]
    }
   ],
   "source": [
    "np.random.seed(1)\n",
    "W = np.array([2.3, - 5.7, 8.9]).T\n",
    "b = 1.2\n",
    "X = np.random.random((10, 3))\n",
    "y = np.dot(X, W)\n",
    "print(\"W: \", W)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "y:  [-3.14568107  0.68066983  1.98993286  4.94832331 -4.29127963  4.13577737\n",
      "  6.32033813  6.60215509 -2.32666945  6.93726089]\n"
     ]
    }
   ],
   "source": [
    "print(\"y: \", y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 2.3, -5.7,  8.9])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "W_estimate = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)\n",
    "W_estimate"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Covariance Matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.27702631, 0.36855193, 0.64431478],\n",
       "       [0.78019793, 0.50860458, 0.52375554],\n",
       "       [0.84079088, 0.36703687, 0.67039217],\n",
       "       [0.83824478, 0.68695113, 0.10454645],\n",
       "       [0.43739591, 0.312447  , 0.25789323]])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.random.seed(1230)\n",
    "X = np.random.ranf((5, 3))\n",
    "X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.05438659,  0.01918617, -0.00915189],\n",
       "       [ 0.01918617,  0.01840192, -0.01705645],\n",
       "       [-0.00915189, -0.01705645,  0.04950637]])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "n = X.shape[0]\n",
    "X0 = X - np.mean(X, axis = 0)\n",
    "(X0.T).dot(X0)/n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.05438659,  0.01918617, -0.00915189],\n",
       "       [ 0.01918617,  0.01840192, -0.01705645],\n",
       "       [-0.00915189, -0.01705645,  0.04950637]])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.cov(X, ddof=0, rowvar=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.05438658851819618"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.var(X[:, 0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.05438659, 0.01918617],\n",
       "       [0.01918617, 0.01840192]])"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.cov(X[:,0], X[:,1], ddof=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Eigen Value Decomposition\n",
    "\n",
    "Read about Eigen, SVD, PCA decomposition https://www.cc.gatech.edu/~dellaert/pubs/svd-note.pdf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.06798324,  0.02398272, -0.01143987],\n",
       "       [ 0.02398272,  0.0230024 , -0.02132056],\n",
       "       [-0.01143987, -0.02132056,  0.06188297]])"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cx = np.cov(X, rowvar=False)\n",
    "cx"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([0.09171233, 0.05316521, 0.00799107]),\n",
       " array([[-0.7030196 , -0.64191217, -0.3061245 ],\n",
       "        [-0.42282087,  0.03115894,  0.90567744],\n",
       "        [ 0.57182686, -0.76614482,  0.29331921]]))"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "e, v = np.linalg.eig(cx)\n",
    "e, v"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.35770485, -0.08016637,  0.20413434],\n",
       "       [ 0.14546677,  0.05988628,  0.0835751 ],\n",
       "       [ 0.20605972, -0.08168143,  0.23021173],\n",
       "       [ 0.20351361,  0.23823282, -0.33563398],\n",
       "       [-0.19733525, -0.1362713 , -0.1822872 ]])"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Z = X - X.mean(axis=0)\n",
    "Z"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 2.22044605e-17, -4.44089210e-17, -2.22044605e-17])"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Z.mean(axis=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "U, D, V = np.linalg.svd(Z)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.66387938,  0.15335678, -0.54128459,  0.37391911,  0.32084477],\n",
       "       [ 0.13174705, -0.33728933, -0.19140748, -0.53739058,  0.73719276],\n",
       "       [-0.03518989, -0.67481681,  0.38890969,  0.58453227,  0.22462241],\n",
       "       [ 0.71940261,  0.29042389, -0.30770774,  0.47836284,  0.2731278 ],\n",
       "       [-0.15208039,  0.56832548,  0.65149012,  0.02978059,  0.47807055]])"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "U"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.60568087, 0.46115164, 0.17878555])"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "D"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.7030196 ,  0.42282087, -0.57182686],\n",
       "       [-0.64191217,  0.03115894, -0.76614482],\n",
       "       [ 0.3061245 , -0.90567744, -0.29331921]])"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "V"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### U and V are unitary matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 1.00000000e+00, -1.09255124e-16,  1.25679661e-17,\n",
       "         1.98321425e-16, -1.01653038e-16],\n",
       "       [-1.09255124e-16,  1.00000000e+00, -2.54001297e-16,\n",
       "         9.28867142e-17,  1.24526333e-16],\n",
       "       [ 1.25679661e-17, -2.54001297e-16,  1.00000000e+00,\n",
       "         1.18816684e-16,  3.68935290e-16],\n",
       "       [ 1.98321425e-16,  9.28867142e-17,  1.18816684e-16,\n",
       "         1.00000000e+00, -1.65612590e-16],\n",
       "       [-1.01653038e-16,  1.24526333e-16,  3.68935290e-16,\n",
       "        -1.65612590e-16,  1.00000000e+00]])"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "U.dot(U.T)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 1.00000000e+00, -1.17976663e-16,  4.11484180e-16],\n",
       "       [-1.17976663e-16,  1.00000000e+00,  1.18705807e-16],\n",
       "       [ 4.11484180e-16,  1.18705807e-16,  1.00000000e+00]])"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "V.dot(V.T)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Vectors of U and V are orthogonal"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(-1.249000902703301e-16, 2.7755575615628914e-17, -2.7755575615628914e-16)"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "U[0].dot(U[1]), U[0].dot(U[2]), U[1].dot(U[2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(-1.1102230246251565e-16, 4.163336342344337e-16, 1.1102230246251565e-16)"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "V[0].dot(V[1]), V[0].dot(V[2]), V[1].dot(V[2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_0 = np.zeros_like(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.60568087, 0.        , 0.        ],\n",
       "       [0.        , 0.46115164, 0.        ],\n",
       "       [0.        , 0.        , 0.17878555],\n",
       "       [0.        , 0.        , 0.        ],\n",
       "       [0.        , 0.        , 0.        ]])"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.fill_diagonal(X_0, D)\n",
    "X_0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.35770485, -0.08016637,  0.20413434],\n",
       "       [ 0.14546677,  0.05988628,  0.0835751 ],\n",
       "       [ 0.20605972, -0.08168143,  0.23021173],\n",
       "       [ 0.20351361,  0.23823282, -0.33563398],\n",
       "       [-0.19733525, -0.1362713 , -0.1822872 ]])"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "U.dot(X_0).dot(V)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
